Source: Twitter
user_id - unique twitter_id of every user
df_got.shape
(760660, 88)
df_got.head()
| user_id | status_id | created_at | screen_name | text | source | display_text_width | reply_to_status_id | reply_to_user_id | reply_to_screen_name | is_quote | is_retweet | favorite_count | retweet_count | hashtags | symbols | urls_url | urls_t.co | urls_expanded_url | media_url | media_t.co | media_expanded_url | media_type | ext_media_url | ext_media_t.co | ext_media_expanded_url | ext_media_type | mentions_user_id | mentions_screen_name | lang | quoted_status_id | quoted_text | quoted_created_at | quoted_source | quoted_favorite_count | quoted_retweet_count | quoted_user_id | quoted_screen_name | quoted_name | quoted_followers_count | quoted_friends_count | quoted_statuses_count | quoted_location | quoted_description | quoted_verified | retweet_status_id | retweet_text | retweet_created_at | retweet_source | retweet_favorite_count | retweet_retweet_count | retweet_user_id | retweet_screen_name | retweet_name | retweet_followers_count | retweet_friends_count | retweet_statuses_count | retweet_location | retweet_description | retweet_verified | place_url | place_name | place_full_name | place_type | country | country_code | geo_coords | coords_coords | bbox_coords | status_url | name | location | description | url | protected | followers_count | friends_count | listed_count | statuses_count | favourites_count | account_created_at | verified | profile_url | profile_expanded_url | account_lang | profile_banner_url | profile_background_url | profile_image_url | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | x51554079 | x1118417386150014976 | 2019-04-17 07:34:18 | moevazquez | 👍 on @YouTube: GAME OF THRONES 8x01 Breakdown!... | IFTTT | 99 | NaN | NaN | NaN | False | False | 0 | 0 | NaN | NaN | youtu.be/8YuXkI1xucc | https://t.co/Ig8QyNPtkL | https://youtu.be/8YuXkI1xucc | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | x10228272 | YouTube | en | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NA NA | NA NA | NA NA NA NA NA NA NA NA | https://twitter.com/moevazquez/status/11184173... | lagoon monster | mexico city | darkness comes alive ... | https://t.co/UtWjeVMUCT | False | 4497 | 4984 | 48 | 206478 | 36131 | 2009-06-27 20:58:25 | False | https://t.co/UtWjeVMUCT | https://www.facebook.com/moisesvo | es | https://pbs.twimg.com/profile_banners/51554079... | http://abs.twimg.com/images/themes/theme18/bg.gif | http://pbs.twimg.com/profile_images/9788848932... |
| 1 | x51554079 | x1117994592136249344 | 2019-04-16 03:34:16 | moevazquez | 👍 on @YouTube: Ups and Downs From Game Of Thro... | IFTTT | 77 | NaN | NaN | NaN | False | False | 0 | 0 | NaN | NaN | youtu.be/gAR3gbNXgf4 | https://t.co/CZndqySpWS | https://youtu.be/gAR3gbNXgf4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | x10228272 | YouTube | en | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NA NA | NA NA | NA NA NA NA NA NA NA NA | https://twitter.com/moevazquez/status/11179945... | lagoon monster | mexico city | darkness comes alive ... | https://t.co/UtWjeVMUCT | False | 4497 | 4984 | 48 | 206478 | 36131 | 2009-06-27 20:58:25 | False | https://t.co/UtWjeVMUCT | https://www.facebook.com/moisesvo | es | https://pbs.twimg.com/profile_banners/51554079... | http://abs.twimg.com/images/themes/theme18/bg.gif | http://pbs.twimg.com/profile_images/9788848932... |
| 2 | x51554079 | x1117987511391981568 | 2019-04-16 03:06:08 | moevazquez | Liked on YouTube: Ups and Downs From Game Of T... | IFTTT | 80 | NaN | NaN | NaN | False | False | 0 | 0 | NaN | NaN | youtu.be/gAR3gbNXgf4 | https://t.co/CZndqySpWS | https://youtu.be/gAR3gbNXgf4 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | en | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NA NA | NA NA | NA NA NA NA NA NA NA NA | https://twitter.com/moevazquez/status/11179875... | lagoon monster | mexico city | darkness comes alive ... | https://t.co/UtWjeVMUCT | False | 4497 | 4984 | 48 | 206478 | 36131 | 2009-06-27 20:58:25 | False | https://t.co/UtWjeVMUCT | https://www.facebook.com/moisesvo | es | https://pbs.twimg.com/profile_banners/51554079... | http://abs.twimg.com/images/themes/theme18/bg.gif | http://pbs.twimg.com/profile_images/9788848932... |
| 3 | x51554079 | x1118410676475760640 | 2019-04-17 07:07:38 | moevazquez | Liked on YouTube: GAME OF THRONES 8x01 Breakdo... | IFTTT | 102 | NaN | NaN | NaN | False | False | 0 | 0 | NaN | NaN | youtu.be/8YuXkI1xucc | https://t.co/Ig8QyNPtkL | https://youtu.be/8YuXkI1xucc | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | en | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NA NA | NA NA | NA NA NA NA NA NA NA NA | https://twitter.com/moevazquez/status/11184106... | lagoon monster | mexico city | darkness comes alive ... | https://t.co/UtWjeVMUCT | False | 4497 | 4984 | 48 | 206478 | 36131 | 2009-06-27 20:58:25 | False | https://t.co/UtWjeVMUCT | https://www.facebook.com/moisesvo | es | https://pbs.twimg.com/profile_banners/51554079... | http://abs.twimg.com/images/themes/theme18/bg.gif | http://pbs.twimg.com/profile_images/9788848932... |
| 4 | x770111944187580416 | x1118417347495251968 | 2019-04-17 07:34:09 | whoisScottyP | @MrLegenDarius unpopular opinion: game of thro... | Twitter for iPhone | 42 | x1118398110810681345 | x37078228 | MrLegenDarius | False | False | 0 | 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | x37078228 | MrLegenDarius | en | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NA NA | NA NA | NA NA NA NA NA NA NA NA | https://twitter.com/whoisScottyP/status/111841... | scotty p | United States | JC '16 Messiah '20 | NaN | False | 168 | 180 | 0 | 1602 | 25651 | 2016-08-29 04:12:50 | False | NaN | NaN | en | https://pbs.twimg.com/profile_banners/77011194... | NaN | http://pbs.twimg.com/profile_images/1108193461... |
df_got.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 760660 entries, 0 to 760659 Data columns (total 10 columns): user_id 760660 non-null object created_at 760660 non-null object raw_tweets 760660 non-null object source 760660 non-null object character_length 760660 non-null int64 geo_coords 760660 non-null object Timeline 760660 non-null object Dates 760660 non-null datetime64[ns] latitude 760660 non-null float64 longitude 760660 non-null float64 dtypes: datetime64[ns](1), float64(2), int64(1), object(6) memory usage: 58.0+ MB
df_got.head()
| user_id | created_at | raw_tweets | source | character_length | geo_coords | Timeline | Dates | latitude | longitude | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | x51554079 | 2019-04-17 07:34:18 | 👍 on @YouTube: GAME OF THRONES 8x01 Breakdown!... | IFTTT | 99 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 |
| 1 | x51554079 | 2019-04-16 03:34:16 | 👍 on @YouTube: Ups and Downs From Game Of Thro... | IFTTT | 77 | NA NA | 2019-04-16 | 2019-04-16 | 0.0 | 0.0 |
| 2 | x51554079 | 2019-04-16 03:06:08 | Liked on YouTube: Ups and Downs From Game Of T... | IFTTT | 80 | NA NA | 2019-04-16 | 2019-04-16 | 0.0 | 0.0 |
| 3 | x51554079 | 2019-04-17 07:07:38 | Liked on YouTube: GAME OF THRONES 8x01 Breakdo... | IFTTT | 102 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 |
| 4 | x770111944187580416 | 2019-04-17 07:34:09 | @MrLegenDarius unpopular opinion: game of thro... | Twitter for iPhone | 42 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 |
df_got.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 760660 entries, 0 to 760659 Data columns (total 13 columns): user_id 760660 non-null object created_at 760660 non-null object raw_tweets 760660 non-null object source 760660 non-null object character_length 760660 non-null int64 geo_coords 760660 non-null object Timeline 760660 non-null object Dates 760660 non-null datetime64[ns] latitude 760660 non-null float64 longitude 760660 non-null float64 p_tweets 760660 non-null object text 760660 non-null object tweets_compound_score 760660 non-null float64 dtypes: datetime64[ns](1), float64(3), int64(1), object(8) memory usage: 75.4+ MB
df_got.describe()
| character_length | latitude | longitude | tweets_compound_score | |
|---|---|---|---|---|
| count | 760660.000000 | 760660.000000 | 760660.000000 | 760660.000000 |
| mean | 112.319532 | 0.110574 | -0.269721 | 0.053009 |
| std | 63.094604 | 2.056793 | 5.049371 | 0.426940 |
| min | 15.000000 | 0.000000 | -123.230000 | -0.995500 |
| 25% | 67.000000 | 0.000000 | 0.000000 | -0.177900 |
| 50% | 99.000000 | 0.000000 | 0.000000 | 0.000000 |
| 75% | 142.000000 | 0.000000 | 0.000000 | 0.381800 |
| max | 1024.000000 | 56.000000 | 0.000000 | 0.998700 |
df_got.head()
| user_id | created_at | raw_tweets | source | character_length | geo_coords | Timeline | Dates | latitude | longitude | p_tweets | text | tweets_compound_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | x51554079 | 2019-04-17 07:34:18 | 👍 on @YouTube: GAME OF THRONES 8x01 Breakdown!... | IFTTT | 99 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 | 👍 on game of thrones x breakdown night king s... | explain breakdown night game x symbol throne king | 0.0000 |
| 1 | x51554079 | 2019-04-16 03:34:16 | 👍 on @YouTube: Ups and Downs From Game Of Thro... | IFTTT | 77 | NA NA | 2019-04-16 | 2019-04-16 | 0.0 | 0.0 | 👍 on ups and downs from game of thrones | game down throne up | 0.0000 |
| 2 | x51554079 | 2019-04-16 03:06:08 | Liked on YouTube: Ups and Downs From Game Of T... | IFTTT | 80 | NA NA | 2019-04-16 | 2019-04-16 | 0.0 | 0.0 | liked on youtube ups and downs from game of th... | down up game like throne | 0.4215 |
| 3 | x51554079 | 2019-04-17 07:07:38 | Liked on YouTube: GAME OF THRONES 8x01 Breakdo... | IFTTT | 102 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 | liked on youtube game of thrones x breakdown n... | explain breakdown night game x symbol like thr... | 0.4215 |
| 4 | x770111944187580416 | 2019-04-17 07:34:09 | @MrLegenDarius unpopular opinion: game of thro... | Twitter for iPhone | 42 | NA NA | 2019-04-17 | 2019-04-17 | 0.0 | 0.0 | unpopular opinion game of thrones edition | unpopular game opinion edit throne | 0.0000 |
pos = df_got[df_got['tweets_compound_score'] > 0.0]
pos_per = (len(pos)/len(df_got)) * 100
pos_per
38.127678594904424
neu = df_got[df_got['tweets_compound_score'] == 0.0]
neu_per = (len(neu)/len(df_got)) * 100
neu_per
33.291483711513685
neg = df_got[df_got['tweets_compound_score'] < 0.0]
neg_per = (len(neg)/len(df_got)) * 100
neg_per
28.58083769358189
plt.figure(figsize=[10, 10])
chr_length = df_got['character_length']
sns.distplot(chr_length, rug=True);
plt.figure(figsize=[10, 10])
cpd_score = df_got['tweets_compound_score']
sns.distplot(cpd_score);
episode_5.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 111332 entries, 487706 to 648692 Data columns (total 13 columns): user_id 111332 non-null object created_at 111332 non-null object raw_tweets 111332 non-null object source 111332 non-null object character_length 111332 non-null int64 geo_coords 111332 non-null object Timeline 111332 non-null object Dates 111332 non-null datetime64[ns] latitude 111332 non-null float64 longitude 111332 non-null float64 p_tweets 111332 non-null object text 111332 non-null object tweets_compound_score 111332 non-null float64 dtypes: datetime64[ns](1), float64(3), int64(1), object(8) memory usage: 11.9+ MB
episode_5.head()
| user_id | created_at | raw_tweets | source | character_length | geo_coords | Timeline | Dates | latitude | longitude | p_tweets | text | tweets_compound_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 487706 | x1122289898889912320 | 2019-05-12 02:59:41 | @JosephJMiccolis And what happens on Game of T... | Twitter for Android | 36 | NA NA | 2019-05-12 | 2019-05-12 | 0.0 | 0.0 | and what happens on game of thrones | happen throne game | 0.0000 |
| 487707 | x439594110 | 2019-05-12 02:59:00 | I love Game of Thrones but here<U+2019>s my in... | Twitter for iPhone | 279 | NA NA | 2019-05-12 | 2019-05-12 | 0.0 | 0.0 | i love game of thrones but hereus my interesti... | iron take rebellion game care interest region ... | 0.2772 |
| 487708 | x1016784296 | 2019-05-12 02:58:56 | starting the game of thrones finallllly | Twitter for iPhone | 39 | NA NA | 2019-05-12 | 2019-05-12 | 0.0 | 0.0 | starting the game of thrones finallllly | game start throne | 0.0000 |
| 487709 | x502333392 | 2019-05-12 02:58:42 | I need help. I<U+2019>m high and idk if I shou... | Twitter for iPhone | 155 | NA NA | 2019-05-12 | 2019-05-12 | 0.0 | 0.0 | i need help ium high and idk if i should watch... | play game help watch high want u kind show thr... | 0.3528 |
| 487710 | x164708941 | 2019-05-12 02:58:11 | My mom moved up Mother<U+2019>s Day dinner bec... | Twitter for iPhone | 64 | NA NA | 2019-05-12 | 2019-05-12 | 0.0 | 0.0 | my mom moved up motherus day dinner because of... | game day dinner throne move | 0.0000 |
topics = ldamodel.print_topics(num_words=10)
for topic in topics:
print(topic)
(0, '0.048*"final" + 0.034*"petit" + 0.028*"writer" + 0.028*"write" + 0.024*"thing" + 0.023*"start" + 0.011*"twitter" + 0.010*"first" + 0.010*"enjoy" + 0.010*"tweet"') (1, '0.025*"night" + 0.020*"spoiler" + 0.018*"happen" + 0.015*"right" + 0.014*"tonight" + 0.012*"dragon" + 0.010*"great" + 0.009*"disappoint" + 0.009*"point" + 0.009*"bitch"')
plt.figure(figsize=(30, 20))
plt.ylim(-0.025, 0.14)
plt.xlabel('Timeline', fontsize=30)
plt.ylabel('Compound Score', fontsize=30)
plot_score.plot(kind='line', color='b', alpha=0.5)
plt.grid()
plt.show();
text = winterfell(x)
plt.figure(figsize=(20, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'go', 'see', 'season', 'think', 'one', 'watch', 'fan', 'time', 'start', 'maybe',
'scene', 'anyone', 'really', 'still', 'anyone', 'already', 'anything', 'never', 'everyone', 'even'])
wc = WordCloud(stopwords=stopwords, background_color='black')
wc.generate(text)
# Display the generated image:
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
text = seven_kingdoms(x)
plt.figure(figsize=(20, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'go', 'start', 'show', 'time', 'fan', 'new', 'see', 'know', 'watch', 'one', 'think', 'tonight',
'say', 'good', 'want', 'still', 'end', 'already', 'anything', 'never', 'everyone', 'even'])
wc = WordCloud(stopwords=stopwords, background_color='black')
wc.generate(text)
# Display the generated image:
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
text = long_knight(x)
plt.figure(figsize=(20, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'go', 'final', 'twitter', 'tonight', 'think', 'time', 'show', 'good', 'end', 'new', 'come',
'fan', 'start', 'know', 'one', 'say', 'spoiler', 'best', 'need', 'want', 'talk', 'see', 'today', 'last', 'catch'])
wc = WordCloud(stopwords=stopwords, contour_width=2, background_color='black')
wc.generate(text)
# Display the generated image:
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
text = last_of_starks(x)
plt.figure(figsize=(20, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'go', 'final', 'tonight', 'time', 'season', 'see', 'say', 'start', 'watch', 'via','make',
'show','new', 'anyone', 'really', 'still', 'anyone', 'already', 'anything', 'never', 'everyone', 'even', 'one', 'end'])
d = getcwd()
cup_mask = np.array(Image.open(path.join(d, "cup.jpg")))
wc = WordCloud(stopwords=stopwords, mask=cup_mask, contour_width=2, contour_color='grey', background_color='black')
wc.generate(text)
wc.to_file("img/cup.jpg")
image_color = ImageColorGenerator(cup_mask)
# Display the generated image:
plt.imshow(wc.recolor(color_func=image_color), interpolation='bilinear')
plt.axis("off")
plt.show()
text = the_bells(x)
plt.figure(figsize=(20, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'new', 'petit', 'via', 'one', 'season', 'fan', 'time', 'theory', 'last',
'see', 'tonight', 'good', 'think', 'say', 'want', 'show', 'know', 'watch', 'go', 'dont'])
d = getcwd()
bell_mask = np.array(Image.open(path.join(d, "bell.png")))
wc = WordCloud(stopwords=stopwords, mask=bell_mask, contour_width=2, contour_color='orange')
wc.generate(text)
wc.to_file("img/bell.png")
#bell_mask = ImageColorGenerator(bell_mask)
# Display the generated image:
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
text = iron_throne(x)
plt.figure(figsize=(15, 10))
# Create and generate a word cloud image:
stopwords = set(STOPWORDS)
stopwords.update(['throne', 'game', 'new', 'petit', 'via', 'one', 'season', 'fan', 'time', 'theory', 'last',
'see', 'tonight', 'good', 'think', 'say', 'want', 'show', 'know', 'watch', 'go', 'dont'])
d = getcwd()
throne_mask = np.array(Image.open(path.join(d, "throne.jpg")))
wc = WordCloud(stopwords=stopwords, mask=throne_mask, contour_width=2, contour_color='firebrick', background_color='black')
wc.generate(text)
throne_mask = ImageColorGenerator(throne_mask)
# Display the generated image:
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()